Reading data and cleaning
az_me <- read_csv("az_mesa_2019_12_17.csv")
population<-read_csv("Population.csv")
zipcode<-read_csv("zipcode.csv")
az_me <- az_me[,c("date","time","subject_age","subject_race","subject_sex","violation","arrest_made","lng","lat","type")]
az_me<- az_me %>% filter( type=="vehicular" & subject_race !="unknown" )
az_me<-na.omit(az_me)
Preliminary Data Analysis
Trend for stopping accross mesa
graph<-az_me %>%
group_by(Year=year(date),subject_race)%>%count()
graph <- inner_join(graph,population,by=c('Year','subject_race'))
graph<-graph %>% mutate(percentage=n*100/Total)
g1<-ggplot(graph, aes(
x = Year,
y = n,
col= subject_race
)) +
geom_line() +
geom_point() +
xlab("Year") +
ylab("Total Number of cases") +
theme(axis.text.x = element_text(face = "bold", size = 10, angle = 45))
g2<-ggplot(graph, aes(
x = Year,
y = percentage,
col= subject_race
)) +
geom_line() +
geom_point() +
xlab("Year") +
ylab("Percentage of population by race") +
theme(axis.text.x = element_text(face = "bold", size = 10, angle = 45))
combined <- g1 + g2 & theme(legend.position = "right")
combined + plot_layout(guides = "collect")

Trend for arresting accross mesa
graph<-az_me %>% filter(arrest_made==TRUE) %>%
group_by(Year=year(date),subject_race)%>%count()
graph <- inner_join(graph,population,by=c('Year','subject_race'))
graph<-graph %>% mutate(percentage=n*100/Total)
g1<-ggplot(graph, aes(
x = Year,
y = n,
col= subject_race
)) +
geom_line() +
geom_point() +
xlab("Year") +
ylab("Total Number of cases") +
theme(axis.text.x = element_text(face = "bold", size = 10, angle = 45))
g2<-ggplot(graph, aes(
x = Year,
y = percentage,
col= subject_race
)) +
geom_line() +
geom_point() +
xlab("Year") +
ylab("Percentage of population by race") +
theme(axis.text.x = element_text(face = "bold", size = 10, angle = 45))
combined <- g1 + g2 & theme(legend.position = "right")
combined + plot_layout(guides = "collect")

Total number of cases: Hour-wise
az_me_age <- az_me %>%
mutate(age_level=cut(az_me$subject_age, breaks =c(15,30,50,81), labels=c('15-30','30-50','>50')))
az_me_age$subject_race=ifelse(az_me_age$subject_race=='unknown',NA,az_me_age$subject_race)
az_me_age<-na.omit(az_me_age)
graph_time<-az_me_age %>%
group_by(hour=as.factor(hour(time)),age_level)%>%count()
g2<-ggplot(na.omit(graph_time), aes(
x = hour,
y = n,
fill= age_level
)) +
geom_col() +
xlab("Hour") +
ylab("Count") +
theme(axis.text.x = element_text(face = "bold", size = 10, angle = 45))+
scale_fill_brewer(palette = "Dark2")
ggplotly(g2)
Total number of cases: Day - Wise
graph_day<-az_me_age %>%
group_by(day=as.factor(weekdays(as.Date(date))),age_level)%>%count()
# graph_day$subject_race <- fct_relevel(as.factor(graph_day$subject_race),'white',after=0L)
g3<-ggplot(graph_day, aes(
x = reorder(day,n),
y = n,
fill= age_level
)) +
geom_col(width = 0.5) +
xlab("Days") +
ylab("Count")+
theme(axis.text.x = element_text(face = "bold", size = 10, angle = 45))+
scale_fill_brewer(palette = "Dark2")
ggplotly(g3)
Total number of vehicles stopped vs Reason for being stopped
violation<-az_me%>%filter(type=="vehicular") %>% group_by(violation)%>%summarise(count=n())
violation <- na.omit(violation)
violation<-violation[order(violation$count,decreasing = TRUE),]
df3<-az_me_age %>%
filter(type=="vehicular") %>%
group_by(violation,age_level) %>%
summarise(count=n())%>%
arrange(desc(count))
df3<-na.omit(df3)
df3$violation<-as.factor(df3$violation)
df4<-df3[1:15,]
g4<-ggplot(data=df4, aes(x=reorder(violation,age_level),y=count,fill=age_level)) +
geom_bar(stat = "identity",position = "dodge") +
coord_flip()+
xlab("Reason for being stopped") +
ylab("Total number of vehicles stopped")
ggplotly(g4)
Model fitting using caret package
Selecting only arrest data
az_me_age_model<-az_me %>% filter( arrest_made == TRUE)
Imputing zipcode which is generated from latitude and longitude values which will be a predictor for my model
# Code for generating zipcode is commented because it gets stuck while knitting the file. so I have written the file to directory and read in to this rmd separately
# x<-revgeo(az_me_age_model$lng, az_me_age_model$lat,output = "frame")$zip
# y<-str_replace(x, ".*\\b(\\d{5})\\b.*", "\\1")
# write.csv(y,"~/Project/zipcode.csv", row.names = FALSE)
az_me_age_model$zipcode<-zipcode$x
az_me_age_model[az_me_age_model$zipcode=="Postcode Not Found",]$zipcode=NA
az_me_age_model<-na.omit(az_me_age_model)
Dividing violations in 5 broad categories which act as response for my model
az_me_age_model$viotype <-
ifelse(
grepl("DRUGS|LIQUOR|PHONE", az_me_age_model$violation),
"distracted_driving",
ifelse(
grepl("DEVICE|LIGHT|STOP|LIGHTS|LAMPS", az_me_age_model$violation),
"running_red_light_or_without_headlight",
ifelse(
grepl(
"TURN|SPEED|MPH|FOLLOWING|AGGRESSIVE |POLICE|DRIVEWAY|JAYWALKING|PEDESTRIAN|RECKLESS|SIDEWALK|PSS|LEFT|RIGHT|SIGNAL|LANE|UNSAFE|EMERGENCY",
az_me_age_model$violation
),
"reckless_driving",
ifelse(
grepl(
"ACCIDENT|QUICK|UNATT|INFO|REPORT|DAMAGE|RELEASE",
az_me_age_model$violation
),
"leaving_scene_of_an_accident_informing",
"without_proper_documents_or_vehicle_defects"
)
)
)
)
az_me_age_model<-az_me_age_model %>%
mutate(subject_race=as.factor(subject_race),
subject_sex=as.factor(subject_sex),
viotype=as.factor(viotype),
zipcode=as.factor(zipcode))
Types of Models fitted
Multinomial Model
model<- viotype~subject_age+subject_race+subject_sex+zipcode
#Multinomial model with up sampling
ctrl <- trainControl(method = "repeatedcv",
number = 10,
sampling = "up")
set.seed(42)
model_rf_over <- caret::train(model,
data = az_me_age_model,
method = "multinom",
preProcess = c("scale", "center"),
trControl = ctrl,
tuneLength = 5)
x<-predict(model_rf_over, newdata = az_me_age_model, type = "raw")
result<- confusionMatrix(x,az_me_age_model$viotype)
result
## Confusion Matrix and Statistics
##
## Reference
## Prediction distracted_driving
## distracted_driving 22
## leaving_scene_of_an_accident_informing 7
## reckless_driving 9
## running_red_light_or_without_headlight 8
## without_proper_documents_or_vehicle_defects 12
## Reference
## Prediction leaving_scene_of_an_accident_informing
## distracted_driving 1
## leaving_scene_of_an_accident_informing 11
## reckless_driving 1
## running_red_light_or_without_headlight 2
## without_proper_documents_or_vehicle_defects 1
## Reference
## Prediction reckless_driving
## distracted_driving 28
## leaving_scene_of_an_accident_informing 31
## reckless_driving 29
## running_red_light_or_without_headlight 36
## without_proper_documents_or_vehicle_defects 24
## Reference
## Prediction running_red_light_or_without_headlight
## distracted_driving 20
## leaving_scene_of_an_accident_informing 19
## reckless_driving 16
## running_red_light_or_without_headlight 50
## without_proper_documents_or_vehicle_defects 19
## Reference
## Prediction without_proper_documents_or_vehicle_defects
## distracted_driving 112
## leaving_scene_of_an_accident_informing 52
## reckless_driving 87
## running_red_light_or_without_headlight 104
## without_proper_documents_or_vehicle_defects 169
##
## Overall Statistics
##
## Accuracy : 0.323
## 95% CI : (0.292, 0.3552)
## No Information Rate : 0.6023
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1175
##
## Mcnemar's Test P-Value : <2e-16
##
## Statistics by Class:
##
## Class: distracted_driving
## Sensitivity 0.37931
## Specificity 0.80172
## Pos Pred Value 0.12022
## Neg Pred Value 0.94760
## Prevalence 0.06667
## Detection Rate 0.02529
## Detection Prevalence 0.21034
## Balanced Accuracy 0.59052
## Class: leaving_scene_of_an_accident_informing
## Sensitivity 0.68750
## Specificity 0.87237
## Pos Pred Value 0.09167
## Neg Pred Value 0.99333
## Prevalence 0.01839
## Detection Rate 0.01264
## Detection Prevalence 0.13793
## Balanced Accuracy 0.77993
## Class: reckless_driving
## Sensitivity 0.19595
## Specificity 0.84349
## Pos Pred Value 0.20423
## Neg Pred Value 0.83654
## Prevalence 0.17011
## Detection Rate 0.03333
## Detection Prevalence 0.16322
## Balanced Accuracy 0.51972
## Class: running_red_light_or_without_headlight
## Sensitivity 0.40323
## Specificity 0.79893
## Pos Pred Value 0.25000
## Neg Pred Value 0.88955
## Prevalence 0.14253
## Detection Rate 0.05747
## Detection Prevalence 0.22989
## Balanced Accuracy 0.60108
## Class: without_proper_documents_or_vehicle_defects
## Sensitivity 0.3225
## Specificity 0.8382
## Pos Pred Value 0.7511
## Neg Pred Value 0.4496
## Prevalence 0.6023
## Detection Rate 0.1943
## Detection Prevalence 0.2586
## Balanced Accuracy 0.5803
#For combinig the Recall Precision to calculate F1 score
get.macro.f1 <- function(cm) {
c <- cm$byClass # a single matrix is sufficient
re <- sum(c[, "Recall"]) / nrow(c)
pr <- sum(c[, "Precision"]) / nrow(c)
f1 <- 2 * ((re * pr) / (re + pr))
ac<-cm$overall["Accuracy"]
f2<-c(ac,re,pr,f1)
return(f2)
}
macro.mul <- get.macro.f1(result)
Decision tree
ctrl <- trainControl(method = "repeatedcv",
number = 10,
repeats = 3,
sampling = "up")
set.seed(42)
model_rf_over <- caret::train(model,
data = az_me_age_model,
method = "rpart",
parms = list(split = "information"),
tuneLength = 10,
trControl = ctrl)
x<-predict(model_rf_over, newdata = az_me_age_model, type = "raw")
result<-confusionMatrix(x,az_me_age_model$viotype)
result
## Confusion Matrix and Statistics
##
## Reference
## Prediction distracted_driving
## distracted_driving 52
## leaving_scene_of_an_accident_informing 1
## reckless_driving 0
## running_red_light_or_without_headlight 3
## without_proper_documents_or_vehicle_defects 2
## Reference
## Prediction leaving_scene_of_an_accident_informing
## distracted_driving 0
## leaving_scene_of_an_accident_informing 16
## reckless_driving 0
## running_red_light_or_without_headlight 0
## without_proper_documents_or_vehicle_defects 0
## Reference
## Prediction reckless_driving
## distracted_driving 17
## leaving_scene_of_an_accident_informing 5
## reckless_driving 75
## running_red_light_or_without_headlight 28
## without_proper_documents_or_vehicle_defects 23
## Reference
## Prediction running_red_light_or_without_headlight
## distracted_driving 14
## leaving_scene_of_an_accident_informing 4
## reckless_driving 8
## running_red_light_or_without_headlight 87
## without_proper_documents_or_vehicle_defects 11
## Reference
## Prediction without_proper_documents_or_vehicle_defects
## distracted_driving 95
## leaving_scene_of_an_accident_informing 18
## reckless_driving 77
## running_red_light_or_without_headlight 92
## without_proper_documents_or_vehicle_defects 242
##
## Overall Statistics
##
## Accuracy : 0.5425
## 95% CI : (0.5088, 0.576)
## No Information Rate : 0.6023
## P-Value [Acc > NIR] : 0.9998
##
## Kappa : 0.371
##
## Mcnemar's Test P-Value : <2e-16
##
## Statistics by Class:
##
## Class: distracted_driving
## Sensitivity 0.89655
## Specificity 0.84483
## Pos Pred Value 0.29213
## Neg Pred Value 0.99133
## Prevalence 0.06667
## Detection Rate 0.05977
## Detection Prevalence 0.20460
## Balanced Accuracy 0.87069
## Class: leaving_scene_of_an_accident_informing
## Sensitivity 1.00000
## Specificity 0.96721
## Pos Pred Value 0.36364
## Neg Pred Value 1.00000
## Prevalence 0.01839
## Detection Rate 0.01839
## Detection Prevalence 0.05057
## Balanced Accuracy 0.98361
## Class: reckless_driving
## Sensitivity 0.50676
## Specificity 0.88227
## Pos Pred Value 0.46875
## Neg Pred Value 0.89718
## Prevalence 0.17011
## Detection Rate 0.08621
## Detection Prevalence 0.18391
## Balanced Accuracy 0.69451
## Class: running_red_light_or_without_headlight
## Sensitivity 0.7016
## Specificity 0.8351
## Pos Pred Value 0.4143
## Neg Pred Value 0.9439
## Prevalence 0.1425
## Detection Rate 0.1000
## Detection Prevalence 0.2414
## Balanced Accuracy 0.7684
## Class: without_proper_documents_or_vehicle_defects
## Sensitivity 0.4618
## Specificity 0.8960
## Pos Pred Value 0.8705
## Neg Pred Value 0.5236
## Prevalence 0.6023
## Detection Rate 0.2782
## Detection Prevalence 0.3195
## Balanced Accuracy 0.6789
get.macro.f1 <- function(cm) {
c <- cm$byClass # a single matrix is sufficient
re <- sum(c[, "Recall"]) / nrow(c)
pr <- sum(c[, "Precision"]) / nrow(c)
f1 <- 2 * ((re * pr) / (re + pr))
ac<-cm$overall["Accuracy"]
f2<-c(ac,re,pr,f1)
return(f2)
}
macro.dt <- get.macro.f1(result)
Random Forest
ctrl <- trainControl(method = "oob",
number = 10,
sampling = "up")
set.seed(42)
model_rf_over_rf <- caret::train(model,
data = az_me_age_model,
method = "rf",
trControl = ctrl)
x<-predict(model_rf_over_rf, newdata = az_me_age_model, type = "raw")
result<-confusionMatrix(x,az_me_age_model$viotype)
result
## Confusion Matrix and Statistics
##
## Reference
## Prediction distracted_driving
## distracted_driving 58
## leaving_scene_of_an_accident_informing 0
## reckless_driving 0
## running_red_light_or_without_headlight 0
## without_proper_documents_or_vehicle_defects 0
## Reference
## Prediction leaving_scene_of_an_accident_informing
## distracted_driving 0
## leaving_scene_of_an_accident_informing 16
## reckless_driving 0
## running_red_light_or_without_headlight 0
## without_proper_documents_or_vehicle_defects 0
## Reference
## Prediction reckless_driving
## distracted_driving 11
## leaving_scene_of_an_accident_informing 1
## reckless_driving 126
## running_red_light_or_without_headlight 8
## without_proper_documents_or_vehicle_defects 2
## Reference
## Prediction running_red_light_or_without_headlight
## distracted_driving 6
## leaving_scene_of_an_accident_informing 3
## reckless_driving 5
## running_red_light_or_without_headlight 109
## without_proper_documents_or_vehicle_defects 1
## Reference
## Prediction without_proper_documents_or_vehicle_defects
## distracted_driving 31
## leaving_scene_of_an_accident_informing 6
## reckless_driving 38
## running_red_light_or_without_headlight 35
## without_proper_documents_or_vehicle_defects 414
##
## Overall Statistics
##
## Accuracy : 0.831
## 95% CI : (0.8044, 0.8554)
## No Information Rate : 0.6023
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7379
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: distracted_driving
## Sensitivity 1.00000
## Specificity 0.94089
## Pos Pred Value 0.54717
## Neg Pred Value 1.00000
## Prevalence 0.06667
## Detection Rate 0.06667
## Detection Prevalence 0.12184
## Balanced Accuracy 0.97044
## Class: leaving_scene_of_an_accident_informing
## Sensitivity 1.00000
## Specificity 0.98829
## Pos Pred Value 0.61538
## Neg Pred Value 1.00000
## Prevalence 0.01839
## Detection Rate 0.01839
## Detection Prevalence 0.02989
## Balanced Accuracy 0.99415
## Class: reckless_driving
## Sensitivity 0.8514
## Specificity 0.9404
## Pos Pred Value 0.7456
## Neg Pred Value 0.9686
## Prevalence 0.1701
## Detection Rate 0.1448
## Detection Prevalence 0.1943
## Balanced Accuracy 0.8959
## Class: running_red_light_or_without_headlight
## Sensitivity 0.8790
## Specificity 0.9424
## Pos Pred Value 0.7171
## Neg Pred Value 0.9791
## Prevalence 0.1425
## Detection Rate 0.1253
## Detection Prevalence 0.1747
## Balanced Accuracy 0.9107
## Class: without_proper_documents_or_vehicle_defects
## Sensitivity 0.7901
## Specificity 0.9913
## Pos Pred Value 0.9928
## Neg Pred Value 0.7572
## Prevalence 0.6023
## Detection Rate 0.4759
## Detection Prevalence 0.4793
## Balanced Accuracy 0.8907
get.macro.f1 <- function(cm) {
c <- cm$byClass # a single matrix is sufficient
re <- sum(c[, "Recall"]) / nrow(c)
pr <- sum(c[, "Precision"]) / nrow(c)
f1 <- 2 * ((re * pr) / (re + pr))
ac<-cm$overall["Accuracy"]
f2<-c(ac,re,pr,f1)
return(f2)
}
macro.rand <- get.macro.f1(result)
Neural Net
ctrl <- trainControl(method = "repeatedcv",
number = 10,
verboseIter = FALSE,
sampling = "up")
set.seed(42)
model_rf_over <- caret::train(model,
data = az_me_age_model,
method = "nnet",
maxit = 1000,
trControl = ctrl)
x<-predict(model_rf_over, newdata = az_me_age_model, type = "raw")
result<-confusionMatrix(x,az_me_age_model$viotype)
result
## Confusion Matrix and Statistics
##
## Reference
## Prediction distracted_driving
## distracted_driving 43
## leaving_scene_of_an_accident_informing 1
## reckless_driving 2
## running_red_light_or_without_headlight 9
## without_proper_documents_or_vehicle_defects 3
## Reference
## Prediction leaving_scene_of_an_accident_informing
## distracted_driving 0
## leaving_scene_of_an_accident_informing 16
## reckless_driving 0
## running_red_light_or_without_headlight 0
## without_proper_documents_or_vehicle_defects 0
## Reference
## Prediction reckless_driving
## distracted_driving 38
## leaving_scene_of_an_accident_informing 7
## reckless_driving 51
## running_red_light_or_without_headlight 40
## without_proper_documents_or_vehicle_defects 12
## Reference
## Prediction running_red_light_or_without_headlight
## distracted_driving 30
## leaving_scene_of_an_accident_informing 6
## reckless_driving 45
## running_red_light_or_without_headlight 35
## without_proper_documents_or_vehicle_defects 8
## Reference
## Prediction without_proper_documents_or_vehicle_defects
## distracted_driving 193
## leaving_scene_of_an_accident_informing 20
## reckless_driving 184
## running_red_light_or_without_headlight 80
## without_proper_documents_or_vehicle_defects 47
##
## Overall Statistics
##
## Accuracy : 0.2207
## 95% CI : (0.1935, 0.2497)
## No Information Rate : 0.6023
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0779
##
## Mcnemar's Test P-Value : <2e-16
##
## Statistics by Class:
##
## Class: distracted_driving
## Sensitivity 0.74138
## Specificity 0.67857
## Pos Pred Value 0.14145
## Neg Pred Value 0.97350
## Prevalence 0.06667
## Detection Rate 0.04943
## Detection Prevalence 0.34943
## Balanced Accuracy 0.70998
## Class: leaving_scene_of_an_accident_informing
## Sensitivity 1.00000
## Specificity 0.96019
## Pos Pred Value 0.32000
## Neg Pred Value 1.00000
## Prevalence 0.01839
## Detection Rate 0.01839
## Detection Prevalence 0.05747
## Balanced Accuracy 0.98009
## Class: reckless_driving
## Sensitivity 0.34459
## Specificity 0.68006
## Pos Pred Value 0.18085
## Neg Pred Value 0.83503
## Prevalence 0.17011
## Detection Rate 0.05862
## Detection Prevalence 0.32414
## Balanced Accuracy 0.51232
## Class: running_red_light_or_without_headlight
## Sensitivity 0.28226
## Specificity 0.82708
## Pos Pred Value 0.21341
## Neg Pred Value 0.87394
## Prevalence 0.14253
## Detection Rate 0.04023
## Detection Prevalence 0.18851
## Balanced Accuracy 0.55467
## Class: without_proper_documents_or_vehicle_defects
## Sensitivity 0.08969
## Specificity 0.93353
## Pos Pred Value 0.67143
## Neg Pred Value 0.40375
## Prevalence 0.60230
## Detection Rate 0.05402
## Detection Prevalence 0.08046
## Balanced Accuracy 0.51161
get.macro.f1 <- function(cm) {
c <- cm$byClass # a single matrix is sufficient
re <- sum(c[, "Recall"]) / nrow(c)
pr <- sum(c[, "Precision"]) / nrow(c)
f1 <- 2 * ((re * pr) / (re + pr))
ac<-cm$overall["Accuracy"]
f2<-c(ac,re,pr,f1)
return(f2)
}
macro.nn <- get.macro.f1(result)
Knn
ctrl <- trainControl(method = "repeatedcv",
repeats = 3,
sampling = "up")
set.seed(42)
model_rf_over <- caret::train(model,
data = az_me_age_model,
method = "knn",
preProcess = c("scale", "center"),
tuneLength = 20,
trControl = ctrl)
x<-predict(model_rf_over, newdata = az_me_age_model, type = "raw")
result<-confusionMatrix(x,az_me_age_model$viotype)
result
## Confusion Matrix and Statistics
##
## Reference
## Prediction distracted_driving
## distracted_driving 57
## leaving_scene_of_an_accident_informing 0
## reckless_driving 0
## running_red_light_or_without_headlight 1
## without_proper_documents_or_vehicle_defects 0
## Reference
## Prediction leaving_scene_of_an_accident_informing
## distracted_driving 0
## leaving_scene_of_an_accident_informing 16
## reckless_driving 0
## running_red_light_or_without_headlight 0
## without_proper_documents_or_vehicle_defects 0
## Reference
## Prediction reckless_driving
## distracted_driving 18
## leaving_scene_of_an_accident_informing 6
## reckless_driving 93
## running_red_light_or_without_headlight 20
## without_proper_documents_or_vehicle_defects 11
## Reference
## Prediction running_red_light_or_without_headlight
## distracted_driving 9
## leaving_scene_of_an_accident_informing 4
## reckless_driving 14
## running_red_light_or_without_headlight 93
## without_proper_documents_or_vehicle_defects 4
## Reference
## Prediction without_proper_documents_or_vehicle_defects
## distracted_driving 103
## leaving_scene_of_an_accident_informing 16
## reckless_driving 122
## running_red_light_or_without_headlight 124
## without_proper_documents_or_vehicle_defects 159
##
## Overall Statistics
##
## Accuracy : 0.4805
## 95% CI : (0.4468, 0.5143)
## No Information Rate : 0.6023
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.3344
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: distracted_driving
## Sensitivity 0.98276
## Specificity 0.83990
## Pos Pred Value 0.30481
## Neg Pred Value 0.99854
## Prevalence 0.06667
## Detection Rate 0.06552
## Detection Prevalence 0.21494
## Balanced Accuracy 0.91133
## Class: leaving_scene_of_an_accident_informing
## Sensitivity 1.00000
## Specificity 0.96956
## Pos Pred Value 0.38095
## Neg Pred Value 1.00000
## Prevalence 0.01839
## Detection Rate 0.01839
## Detection Prevalence 0.04828
## Balanced Accuracy 0.98478
## Class: reckless_driving
## Sensitivity 0.6284
## Specificity 0.8116
## Pos Pred Value 0.4061
## Neg Pred Value 0.9142
## Prevalence 0.1701
## Detection Rate 0.1069
## Detection Prevalence 0.2632
## Balanced Accuracy 0.7200
## Class: running_red_light_or_without_headlight
## Sensitivity 0.7500
## Specificity 0.8056
## Pos Pred Value 0.3908
## Neg Pred Value 0.9509
## Prevalence 0.1425
## Detection Rate 0.1069
## Detection Prevalence 0.2736
## Balanced Accuracy 0.7778
## Class: without_proper_documents_or_vehicle_defects
## Sensitivity 0.3034
## Specificity 0.9566
## Pos Pred Value 0.9138
## Neg Pred Value 0.4756
## Prevalence 0.6023
## Detection Rate 0.1828
## Detection Prevalence 0.2000
## Balanced Accuracy 0.6300
get.macro.f1 <- function(cm) {
c <- cm$byClass # a single matrix is sufficient
re <- sum(c[, "Recall"]) / nrow(c)
pr <- sum(c[, "Precision"]) / nrow(c)
f1 <- 2 * ((re * pr) / (re + pr))
ac<-cm$overall["Accuracy"]
f2<-c(ac,re,pr,f1)
return(f2)
}
macro.knn <- get.macro.f1(result)
rbind(macro.mul,macro.dt,macro.rand,macro.nn,macro.knn)
## Accuracy
## macro.mul 0.3229885 0.3977002 0.2834443 0.3309896
## macro.dt 0.5425287 0.7133507 0.4818621 0.5751891
## macro.rand 0.8310345 0.9040920 0.7236055 0.8038422
## macro.nn 0.2206897 0.4915853 0.3054283 0.3767667
## macro.knn 0.4804598 0.7329144 0.4792856 0.5795666
Map visualizations
Model Output with max probability violation
#Importing map for mesa city
register_google(key = "AIzaSyCvgTagZzhtN_1FiBYEQy29kgkSYgxEAao", write = TRUE)
mesa <- ggmap(get_map(c(left=-111.9363,bottom=33.27729,right=-111.5822,top=33.49722)))
#Predicted probability percentages for getting arrested for different violation types at different locations.
x<-predict(model_rf_over_rf, newdata = az_me_age_model, type = "prob")
#combining predicted values with original data set.
n<-nrow(x)
x$obs<-1:n
x_long <- x %>% pivot_longer(-obs,
names_to = "violation_type",
values_to = "prob")
x_max <- x_long %>%
group_by(obs) %>%
summarise(max_prob = max(prob),
violation_type = violation_type[which(prob == max_prob)])
az_me_age_model$obs<-c(1:n)
az_me_age_model<-inner_join(az_me_age_model,x_max,by="obs")
az_me_age_model_shiny<-inner_join(az_me_age_model,x_long,by="obs")
az_me_age_model <- az_me_age_model %>%
arrange(obs) %>%
mutate( violation_type=factor(violation_type, unique(violation_type))) %>%
mutate( mytext=paste(
"Violation Type: ", violation_type, "\n",
"Percentage of getting arrested for this violation: ", formatC(max_prob*100), sep="")
)
shiny_data<- az_me_age_model_shiny %>% group_by(violation_type.y,subject_race,subject_sex) %>% distinct()
shiny_data$violation_type<- as.factor(shiny_data$violation_type.y)
shiny_data<-shiny_data[,c("subject_age","subject_race","subject_sex","lng","lat","violation_type","prob")]
shiny_data<-unique(shiny_data)
write.csv(shiny_data,"~/shiny_data.csv", row.names = FALSE)
# Make the map (static)
p <- mesa +
geom_point(
aes(
x = lng,
y = lat,
text = mytext,
color =violation_type
),
data = az_me_age_model
) +
theme_void() +
ggtitle("Probability of arrest violation types across the MESA(AZ state)")+
theme_bw() + theme(axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.border = element_blank())
p <- ggplotly(p,tooltip = "text")
p
Heat map for number of arrests which are predicted from the model
az_me_age_model$lon<-az_me_age_model$lng
mesa +
coord_equal() +
xlab('Longitude') +
ylab('Latitude') +
stat_density2d(aes(fill = ..level..), alpha = .3,
geom = "polygon", data = az_me_age_model,show.legend=TRUE) +
scale_fill_viridis_c() +
theme(axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())+ labs(fill = "Number of Arrest Cases")+
facet_wrap(~violation_type)
